#####################################
## "Backyard politics in Foreign Aid"
## William Christiansen
## Tobias Heinrich
## Timothy Peterson
#####################################

## File that prepares MTurk output for 
## analysis NIMBY data


## Load data
############
nimby <- read.csv("data/MTurk data.csv", sep="|", stringsAsFactors = FALSE)
senators <- read.csv("data/Senators raw.csv", header=TRUE, stringsAsFactors = FALSE)[-1, ]
colnames(senators) <- c("State", "Name", "Party")

## Y1: Rating of USAID package
## Y2: Feeling thermometer for senator
## M1: Contracts benefit friends/family?
## M2: Contracts benefit state you live in?
## M3: Contracts improve U.S. economy
## M4: Contracts improve U.S. security
## M5: Contracts benefits elites more
## M6: Moral imperative to help


## Setting up
#############
## Retaining only people from the state
nimby <- subset(nimby, user_state %in% c("NJ", "CA", "FL", "MA", "MD", "NC", "NY", "TX", "VA"))
nimby$Age <- 2016 - nimby$user_birthyr
nimby <- nimby[, c("Age", "user_gender", "user_state", 
                   "isLocalRecord_2", "p3_rating", "p3_costs", "p3_cutOrIncrease", 
                   "p3_mentionSenator", "p3_aid_q2_1", "p3_aid_q2_2", "p3_aid_q2_3",
                   "p3_aid_q2_4", "p3_aid_q2_5", "p3_aid_q2_6", "senatorNameFull_2", 
                   "statename_2", 
                   "p4_years_lived", "user_edu", "user_zip", "p4_call_home", "p4_ideology", 
                   "p4_developing_countries", "p4_greatest_country", "p4_senator_1", "p4_senator_2",
                   "p4_senator_1_name", "p4_senator_2_name")]
colnames(nimby) <- c("Age", "Gender", "State", "Local", "Y1", "Costs", "Change", 
                     "MentionSenator", paste0("M", 1:6), "ShownSenatorName", "ShownState", 
                     "LivedHere", "Education", "user_zip",
                     "CallHome", "Ideology", "Transnational", "GreatestCountry",
                     "Y2a", "Y2b", "Y2a_senator", "Y2b_senator")
nimby$ShownSenatorName[nimby$MentionSenator == 0] <- ""

nimby$Gender <- ifelse(nimby$Gender == "f", 1, 0)
nimby$Education_hi <- ifelse(nimby$Education >= 5, 1, 0)
nimby$Education_lo <- ifelse(nimby$Education <= 3, 1, 0)
nimby$Education <- NULL
nimby$Change <- ifelse(nimby$Change == "increase", "Increase", "Cut")
nimby$M1 <- ifelse(nimby$M1 == 1, 1, 0)
nimby$M2 <- ifelse(nimby$M2 == 1, 1, 0)
nimby$M3 <- ifelse(nimby$M3 == 1, 1, 0)
nimby$M4 <- ifelse(nimby$M4 == 1, 1, 0)
nimby$M5 <- ifelse(nimby$M5 == 1, 1, 0)
nimby$M6 <- ifelse(nimby$M6 == 1, 1, 0)
nimby$LivedHere <- ifelse(nimby$LivedHere / nimby$Age > 1, 1, nimby$LivedHere / nimby$Age)
nimby$CallHome <- ifelse(nimby$CallHome == 1, 1, 0)
nimby$Transnational <- ifelse(nimby$Transnational == 1, 1, 0)
nimby$GreatestCountry <- ifelse(nimby$GreatestCountry == 1, 1, 0)
## Rescale
nimby$Y1 <- (nimby$Y1 - 1) / 9 * 100

## Merge in Zillow data, impute
###############################
z_value <- read.csv("data/Zip_MedianValuePerSqft_AllHomes.csv")
z_increase <- read.csv("data/Zip_PctOfHomesIncreasingInValues_AllHomes.csv")
z_value <- z_value[, c("RegionName", "X2016.09")]
z_increase <- z_increase[, c("RegionName", "X2016.09")]
colnames(z_value) <- c("user_zip", "HousingMed")
colnames(z_increase) <- c("user_zip", "HousingPctIncrease")

nimby <- merge(nimby, z_increase, by="user_zip", all.x=T)
nimby <- merge(nimby, z_value, by="user_zip", all.x=T)

## Fill in missingness for Zillow Housing price increase  
which_nna <- is.na(nimby$HousingPctIncrease) == FALSE
mod <- randomForest(x=nimby[which_nna, c("Age", "Gender", "Transnational", "GreatestCountry", "Education_lo", 
                                         "Education_hi", "Ideology", "user_zip", "Y1", "Y2a","Y2b", 
                                         paste0("M", 1:6), "LivedHere")],
                    y=nimby$HousingPctIncrease[which_nna])
nimby$HousingPctIncrease[which_nna == FALSE] <- predict(object=mod, newdata=nimby[which_nna==FALSE, -which(colnames(nimby) %in% c("HousingMed", "HousingPctIncrease"))])
## Fill in missingness for Zillow Housing value data
which_nna <- is.na(nimby$HousingMed) == FALSE
mod <- randomForest(x=nimby[which_nna, c("Age", "Gender", "Transnational", "GreatestCountry", "Education_lo", 
                                         "Education_hi", "Ideology", "user_zip", "Y1", "Y2a","Y2b", "HousingPctIncrease", 
                                         paste0("M", 1:6), "LivedHere")],
                    y=nimby$HousingMed[which_nna])
nimby$HousingMed[which_nna == FALSE] <- predict(object=mod, newdata=nimby[which_nna == FALSE, -which(colnames(nimby) == "HousingMed")])
nimby$HousingMed <- log(nimby$HousingMed)  


## Treatments
nimby$T0 <- ifelse(nimby$Local == 0, 1, 0)
nimby$T1 <- ifelse(nimby$Local == 1 & nimby$MentionSenator == 0, 1, 0)
nimby$T2 <- ifelse(nimby$Local == 1 & nimby$MentionSenator == 1, 1, 0)
nimby$Treatment <- nimby$T0 * 0  + nimby$T1 * 1 + nimby$T2 * 2     


## Make d1
## Dataset contains one row per respondent
##########################################
d1 <- nimby
d1$Y2a <- NULL
d1$Y2b <- NULL
d1$MentionSenator <- NULL
d1$Local <- NULL



## Make d2
## Dataset contains two rows per respondent, 
## one record per Senator-respondent
############################################
tmp <- vector("list", nrow(nimby))
d2 <- nimby
d2 <- d2[, -which(colnames(d2) == "Y1")]
row.names(d2) <- NULL

for(i in 1:nrow(d2))
{
  tmp[[i]] <- d2[c(i, i), ]
  tmp[[i]]$Y2 <- c(d2[i, "Y2a"], d2[i, "Y2b"])
  tmp[[i]]$Y2a <- NULL
  tmp[[i]]$Y2b <- NULL
  tmp[[i]]$SenatorName <- c(tmp[[i]]$Y2a_senator[1], tmp[[i]]$Y2b_senator[1])
  tmp[[i]]$Treatment_copy <- tmp[[i]]$Treatment
  
  if(tmp[[i]]$Treatment_copy[1] == 2)
  {
    if(tmp[[i]]$ShownSenatorName[1] == tmp[[i]]$SenatorName[1])
    {
      tmp[[i]]$Treatment <- c(2, 1)
    }
    if(tmp[[i]]$ShownSenatorName[1] == tmp[[i]]$SenatorName[2])
    {
      tmp[[i]]$Treatment <- c(1, 2)
    }
  }
  if(tmp[[i]]$Treatment_copy[1] == 1) tmp[[i]]$Treatment <- 1
  if(tmp[[i]]$Treatment_copy[1] == 0) tmp[[i]]$Treatment <- 0
  tmp[[i]]$Treatment_copy <- NULL
}
d2 <- ldply(.data=tmp, .fun=function(x) x)
d2$ID <- rep(1:nrow(d1), each=2)
d2$T0 <- ifelse(d2$Treatment == 0, 1, 0)
d2$T1 <- ifelse(d2$Treatment == 1, 1, 0)
d2$T2 <- ifelse(d2$Treatment == 2, 1, 0)
d2$Y2a_senator <- NULL
d2$Y2b_senator <- NULL
d2$MentionSenator <- NULL
d2$Local <- NULL

## Save to disc
save(d1, file="output/data_d1.Rdata")
save(d2, file="output/data_d2.Rdata")




